options(repos = c(CRAN = "https://cloud.r-project.org"))

cat("\014")
rm(list = ls())
install.packages("plotly")
## 
## The downloaded binary packages are in
##  /var/folders/06/y6vmvyfj0wg08vb3rszcfy080000gn/T//RtmpGyMWrR/downloaded_packages
df <- read.csv("https://seattle-library-checkout-data.s3.us-west-2.amazonaws.com/norton-anthology_spl-checkouts_2005-2025.csv")
library("tidyverse")
## Warning: package 'tibble' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("plotly")
## Warning: package 'plotly' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
# most checked out authors
top_authors <- df %>%
  group_by(Normalized.Creator) %>%
  summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
  arrange(desc(TotalCheckouts)) %>%
  slice_head(n = 10)

print("Top 10 Most Checked Out Authors:")
## [1] "Top 10 Most Checked Out Authors:"
print(top_authors)
## # A tibble: 10 × 2
##    Normalized.Creator TotalCheckouts
##    <chr>                       <int>
##  1 Ursula K. Le Guin           73224
##  2 Octavia E. Butler           65386
##  3 Louise Erdrich              60846
##  4 N. K. Jemisin               59859
##  5 Toni Morrison               47501
##  6 Kurt Vonnegut               41462
##  7 George Saunders             38838
##  8 Philip K. Dick              38230
##  9 Sherman Alexie              37477
## 10 James Baldwin               32463
# most checked out titles
top_titles <- df %>%
  group_by(Normalized.Title) %>%
  summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
  arrange(desc(TotalCheckouts)) %>%
  slice_head(n = 10)

print("Top 10 Most Checked Out Titles:")
## [1] "Top 10 Most Checked Out Titles:"
print(top_titles)
## # A tibble: 10 × 2
##    Normalized.Title                                TotalCheckouts
##    <chr>                                                    <int>
##  1 Parable Of The Sower                                     21652
##  2 Lincoln In The Bardo                                     17356
##  3 The Fifth Season                                         17223
##  4 The Sympathizer                                          12792
##  5 Kindred                                                  12591
##  6 Beloved                                                  12330
##  7 The Left Hand Of Darkness                                12148
##  8 The Absolutely True Diary Of A Part Time Indian          12043
##  9 The Year Of Magical Thinking                             10907
## 10 The Sentence                                             10658
# Bar chart for top authors
ggplot(top_authors, aes(x = reorder(Normalized.Creator, TotalCheckouts), y = TotalCheckouts)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Checked Out Authors", x = "Author", y = "Total Checkouts")

# Bar chart for top titles
ggplot(top_titles, aes(x = reorder(Normalized.Title, TotalCheckouts), y = TotalCheckouts)) +
  geom_bar(stat = "identity", fill = "darkgreen") +
  coord_flip() +
  labs(title = "Top 10 Checked Out Titles", x = "Title", y = "Total Checkouts")

# Create CheckoutDate column
df <- df %>%
  mutate(CheckoutDate = as.Date(paste(CheckoutYear, CheckoutMonth, "01", sep = "-")))

# Find top 10 authors by total checkouts
top10_authors <- df %>%
  group_by(Normalized.Creator) %>%
  summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
  arrange(desc(TotalCheckouts)) %>%
  slice_head(n = 10) %>%
  pull(Normalized.Creator)

# Filter data to include only top 10 authors
top_authors_df <- df %>%
  filter(Normalized.Creator %in% top10_authors)

# Group by date and author
author_ts_all <- top_authors_df %>%
  group_by(CheckoutDate, Normalized.Creator) %>%
  summarise(Checkouts = sum(Checkouts, na.rm = TRUE), .groups = "drop")

# Reorder authors by total checkouts (descending)
author_order <- author_ts_all %>%
  group_by(Normalized.Creator) %>%
  summarise(Total = sum(Checkouts, na.rm = TRUE)) %>%
  arrange(desc(Total)) %>%
  pull(Normalized.Creator)

author_ts_all$Normalized.Creator <- factor(author_ts_all$Normalized.Creator, levels = author_order)

# Plot
authors <- ggplot(author_ts_all, aes(x = CheckoutDate, y = Checkouts, color = Normalized.Creator)) +
  geom_line(size = 1) +
  labs(
    title = "Monthly Checkouts for Top 10 Authors",
    x = "Date", y = "Checkouts", color = "Author(Descending)"
  ) +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplotly(authors)
# Normalize the date
df <- df %>%
  mutate(CheckoutDate = as.Date(paste(CheckoutYear, CheckoutMonth, "01", sep = "-")))

# Get top 10 titles by total checkouts
top_10_titles <- df %>%
  group_by(Normalized.Title) %>%
  summarise(TotalCheckouts = sum(Checkouts, na.rm = TRUE)) %>%
  arrange(desc(TotalCheckouts)) %>%
  slice_head(n = 10) %>%
  pull(Normalized.Title)

# Filter to top 10 titles only
df_top_titles <- df %>%
  filter(Normalized.Title %in% top_10_titles)

# Group by title and checkout date
title_time_series <- df_top_titles %>%
  group_by(Normalized.Title, CheckoutDate) %>%
  summarise(Checkouts = sum(Checkouts, na.rm = TRUE), .groups = "drop")

# Reorder titles by total checkouts (descending)
title_order <- title_time_series %>%
  group_by(Normalized.Title) %>%
  summarise(Total = sum(Checkouts, na.rm = TRUE)) %>%
  arrange(desc(Total)) %>%
  pull(Normalized.Title)

title_time_series$Normalized.Title <- factor(title_time_series$Normalized.Title, levels = title_order)

authors <- ggplot(title_time_series, aes(x = CheckoutDate, y = Checkouts, color = Normalized.Title)) +
  geom_line(size = 1) +
  labs(
    title = "Monthly Checkouts for Top 10 Titles",
    x = "Date", y = "Checkouts", color = "Title(Descending)"
  ) +
  theme_minimal() +
  theme(legend.position = "right") # default vertical legend
ggplotly(authors)
butler_df <- df %>%
  filter(Normalized.Creator == "Octavia E. Butler") %>%
  group_by(CheckoutDate) %>%
  summarise(Checkouts = sum(Checkouts, na.rm = TRUE))

butler <- ggplot(butler_df, aes(x = CheckoutDate, y = Checkouts)) +
  geom_line(color = "steelblue", size = 1) +
  labs(title = "Monthly Checkouts: Octavia E. Butler", x = "Date", y = "Checkouts") +
  theme_minimal()
ggplotly(butler)
parable_df <- df %>%
  filter(Normalized.Title == "Parable Of The Sower") %>%
  group_by(CheckoutDate) %>%
  summarise(Checkouts = sum(Checkouts, na.rm = TRUE))

ggplot(parable_df, aes(x = CheckoutDate, y = Checkouts)) +
  geom_line(color = "firebrick", size = 1) +
  labs(title = "Monthly Checkouts: Parable Of The Sower", x = "Date", y = "Checkouts") +
  theme_minimal()

oliver_df <- df %>%
  filter(Normalized.Creator == "Mary Oliver") %>%
  group_by(CheckoutDate) %>%
  summarise(Checkouts = sum(Checkouts, na.rm = TRUE))

ggplot(oliver_df, aes(x = CheckoutDate, y = Checkouts)) +
  geom_line(color = "darkgreen", size = 1) +
  labs(title = "Monthly Checkouts: Mary Oliver", x = "Date", y = "Checkouts") +
  theme_minimal()